{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\anaconda\\lib\\site-packages\\sklearn\\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import os\n",
    "import codecs\n",
    "from sklearn.cross_validation import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "seed = 1024\n",
    "np.random.seed(seed)\n",
    "\n",
    "path = '../data/'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "corpus = pd.read_pickle(path+'corpus.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "y = corpus['label']\n",
    "train,test,train_y,test_y=train_test_split(corpus,y,test_size=0.2,random_state=seed,stratify=y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "valid,dev,valid_y,dev_y=train_test_split(test,test_y,test_size=0.5,random_state=seed,stratify=test_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((24736, 4), (3092, 4), (3092, 4))"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.shape,valid.shape,dev.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "train samples percentange of corpus: 80.0%\n",
      "postive samples percentange of train: 50.04%\n",
      "postive samples percentange of valid: 50.03%\n",
      "postive samples percentange of dev: 50.03%\n"
     ]
    }
   ],
   "source": [
    "print('train samples percentange of corpus: {}%'.format(round((train.shape[0]/corpus.shape[0]) * 100,2)))\n",
    "print('postive samples percentange of train: {}%'.format(round(train['label'].mean() * 100,2)))\n",
    "print('postive samples percentange of valid: {}%'.format(round(valid['label'].mean() * 100,2)))\n",
    "print('postive samples percentange of dev: {}%'.format(round(dev['label'].mean() * 100,2)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>context</th>\n",
       "      <th>field</th>\n",
       "      <th>label</th>\n",
       "      <th>lan</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>6734</th>\n",
       "      <td>\"I have seen every episode of this spin off. I...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>408</th>\n",
       "      <td>\"Like most people I love \\\"A Christmas Story\\\"...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>959</th>\n",
       "      <td>房间很小，有点像招待所.............四星...............\\r\\r\\...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18375</th>\n",
       "      <td>\"... or maybe it just IS this bad. The plot is...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5323</th>\n",
       "      <td>\"It's a good movie maybe I like it because it ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9004</th>\n",
       "      <td>\"If you are a fan of really bad movies, and yo...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>256</th>\n",
       "      <td>\"I just got back from this free screening, and...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10296</th>\n",
       "      <td>\"D.W. Griffith could have made any film he wan...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13183</th>\n",
       "      <td>\"This movie contains one of Richard Dreyfuss's...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13054</th>\n",
       "      <td>\"The Deadly Wake is THE PERFECT MOVIE for film...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9457</th>\n",
       "      <td>\"Magicians is a wonderful ride from start to f...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>938</th>\n",
       "      <td>synopsis : a maniac , crazed by virulent micro...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14122</th>\n",
       "      <td>\"This is a wonderful movie about a brothel in ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3331</th>\n",
       "      <td>\"MGM were unsure of how to market Garbo when s...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>898</th>\n",
       "      <td>\"This film provides us with an interesting rem...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>889</th>\n",
       "      <td>价格在这个地段属于适中, 附近有早餐店,小饭店, 比较方便,无早也无所\\r\\r\\n\\r\\r\\...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24040</th>\n",
       "      <td>\"Why does this have such a low rating? I reall...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18583</th>\n",
       "      <td>\"This is a complex documentary that shows many...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>347</th>\n",
       "      <td>if you have ever seen fox tv's special car cha...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8654</th>\n",
       "      <td>\"This film is really bad. It maybe harsh, but ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5248</th>\n",
       "      <td>\"BABY FACE is one of the better of the \\\"forgo...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20162</th>\n",
       "      <td>\"Carlos Mencia was excellent this is hour spec...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>533</th>\n",
       "      <td>capsule : trippy , hyperspeed action machine f...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1492</th>\n",
       "      <td>上次去住想起来还是很不高兴,根本没有星,好旧的酒店,除了房间大,东西旧得吓人,毛巾在面盆里洗...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3043</th>\n",
       "      <td>\"An absolutely wretched waste of film!! Nothin...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24131</th>\n",
       "      <td>\"Enterprise, the latest high budget spin-off t...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>554</th>\n",
       "      <td>还不错的酒店，虽然有些旧，日方也撤资了。可能南京本来也不大的原因吧，从酒店去新街口等地方都不...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>876</th>\n",
       "      <td>真不象是一家四星级的酒店，评三星都觉得高，房间很小并且很旧，到处都觉得乱糟糟的，一楼大堂很吵...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23858</th>\n",
       "      <td>\"Okay. As you can see this is one of my favori...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7140</th>\n",
       "      <td>\"Dull haunted house thriller finds an American...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6874</th>\n",
       "      <td>\"This may just be the most nostalgic journey b...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2769</th>\n",
       "      <td>\"I'm not a stage purist. A movie could have be...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11630</th>\n",
       "      <td>\"It's only 2 episodes into a 5 part drama, but...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2930</th>\n",
       "      <td>\"I have given this film an elevated rating of ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23919</th>\n",
       "      <td>\"Nothing could have saved this movie, not even...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23668</th>\n",
       "      <td>\"Writer-director Tony Piccirillo adapted his o...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>293</th>\n",
       "      <td>我住的是6号房，无窗，像住地下室；而且因此酒店地处繁华地段经营业务也很多，电梯里遇见的人很杂...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7678</th>\n",
       "      <td>\"Three years ago, Rachel(Therese Fretwell) was...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18075</th>\n",
       "      <td>\"I went into this movie perhaps a bit jaded by...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7888</th>\n",
       "      <td>\"\\\"Ask the Dust\\\" looked intriguing from the t...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15619</th>\n",
       "      <td>\"the movie is far more sophisticated and intel...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13898</th>\n",
       "      <td>\"Utterly predictable silly show about a man wh...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2405</th>\n",
       "      <td>\"This has to be one of the best, if not the be...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10307</th>\n",
       "      <td>\"it got switched off before the opening credit...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17838</th>\n",
       "      <td>\"There are two things that I like about Elvira...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1639</th>\n",
       "      <td>服务态度很好不过似乎水平比不上态度，checkin&amp;out永远是大家抱怨的焦点。据说总部规定...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7179</th>\n",
       "      <td>\"There are no spoilers in this review. There's...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20916</th>\n",
       "      <td>\"Celia Johnson is good as the Nurse. Michael H...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16215</th>\n",
       "      <td>\"This U.S soap opera, 'Knots Landing' has all ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10800</th>\n",
       "      <td>\"This is a movie which attempts a retelling of...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7222</th>\n",
       "      <td>\"As with a bunch of guys at school we must giv...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1619</th>\n",
       "      <td>广大携程会员请注意:此酒店旁边在造地铁之类的施工,有噪音和灰尘.等完工还有一段日期.出门叫出...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2846</th>\n",
       "      <td>\"You do not get more dark or tragic than \\\"Oth...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4213</th>\n",
       "      <td>\"Well, at least my theater group did, lol. So ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6685</th>\n",
       "      <td>\"Once again Jet Li brings his charismatic pres...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18165</th>\n",
       "      <td>\"This show has come so far. At first EVERYONE ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21109</th>\n",
       "      <td>\"Citizen X tells the story of Andrei Chikatilo...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13694</th>\n",
       "      <td>\"Steve Biko was a black activist who tried to ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7683</th>\n",
       "      <td>\"Scott Menville is not Casey Kasem. That is th...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6434</th>\n",
       "      <td>\"Quite possibly the nicest woman in show busin...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>24736 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 context  field  label lan\n",
       "6734   \"I have seen every episode of this spin off. I...  movie      0  en\n",
       "408    \"Like most people I love \\\"A Christmas Story\\\"...  movie      0  en\n",
       "959    房间很小，有点像招待所.............四星...............\\r\\r\\...  hotel      1  ch\n",
       "18375  \"... or maybe it just IS this bad. The plot is...  movie      0  en\n",
       "5323   \"It's a good movie maybe I like it because it ...  movie      1  en\n",
       "9004   \"If you are a fan of really bad movies, and yo...  movie      0  en\n",
       "256    \"I just got back from this free screening, and...  movie      0  en\n",
       "10296  \"D.W. Griffith could have made any film he wan...  movie      1  en\n",
       "13183  \"This movie contains one of Richard Dreyfuss's...  movie      1  en\n",
       "13054  \"The Deadly Wake is THE PERFECT MOVIE for film...  movie      0  en\n",
       "9457   \"Magicians is a wonderful ride from start to f...  movie      1  en\n",
       "938    synopsis : a maniac , crazed by virulent micro...  movie      0  en\n",
       "14122  \"This is a wonderful movie about a brothel in ...  movie      1  en\n",
       "3331   \"MGM were unsure of how to market Garbo when s...  movie      1  en\n",
       "898    \"This film provides us with an interesting rem...  movie      1  en\n",
       "889    价格在这个地段属于适中, 附近有早餐店,小饭店, 比较方便,无早也无所\\r\\r\\n\\r\\r\\...  hotel      1  ch\n",
       "24040  \"Why does this have such a low rating? I reall...  movie      1  en\n",
       "18583  \"This is a complex documentary that shows many...  movie      1  en\n",
       "347    if you have ever seen fox tv's special car cha...  movie      0  en\n",
       "8654   \"This film is really bad. It maybe harsh, but ...  movie      0  en\n",
       "5248   \"BABY FACE is one of the better of the \\\"forgo...  movie      1  en\n",
       "20162  \"Carlos Mencia was excellent this is hour spec...  movie      1  en\n",
       "533    capsule : trippy , hyperspeed action machine f...  movie      1  en\n",
       "1492   上次去住想起来还是很不高兴,根本没有星,好旧的酒店,除了房间大,东西旧得吓人,毛巾在面盆里洗...  hotel      0  ch\n",
       "3043   \"An absolutely wretched waste of film!! Nothin...  movie      0  en\n",
       "24131  \"Enterprise, the latest high budget spin-off t...  movie      1  en\n",
       "554    还不错的酒店，虽然有些旧，日方也撤资了。可能南京本来也不大的原因吧，从酒店去新街口等地方都不...  hotel      1  ch\n",
       "876    真不象是一家四星级的酒店，评三星都觉得高，房间很小并且很旧，到处都觉得乱糟糟的，一楼大堂很吵...  hotel      0  ch\n",
       "23858  \"Okay. As you can see this is one of my favori...  movie      1  en\n",
       "7140   \"Dull haunted house thriller finds an American...  movie      0  en\n",
       "...                                                  ...    ...    ...  ..\n",
       "6874   \"This may just be the most nostalgic journey b...  movie      1  en\n",
       "2769   \"I'm not a stage purist. A movie could have be...  movie      0  en\n",
       "11630  \"It's only 2 episodes into a 5 part drama, but...  movie      1  en\n",
       "2930   \"I have given this film an elevated rating of ...  movie      0  en\n",
       "23919  \"Nothing could have saved this movie, not even...  movie      0  en\n",
       "23668  \"Writer-director Tony Piccirillo adapted his o...  movie      0  en\n",
       "293    我住的是6号房，无窗，像住地下室；而且因此酒店地处繁华地段经营业务也很多，电梯里遇见的人很杂...  hotel      0  ch\n",
       "7678   \"Three years ago, Rachel(Therese Fretwell) was...  movie      0  en\n",
       "18075  \"I went into this movie perhaps a bit jaded by...  movie      1  en\n",
       "7888   \"\\\"Ask the Dust\\\" looked intriguing from the t...  movie      0  en\n",
       "15619  \"the movie is far more sophisticated and intel...  movie      1  en\n",
       "13898  \"Utterly predictable silly show about a man wh...  movie      0  en\n",
       "2405   \"This has to be one of the best, if not the be...  movie      1  en\n",
       "10307  \"it got switched off before the opening credit...  movie      0  en\n",
       "17838  \"There are two things that I like about Elvira...  movie      1  en\n",
       "1639   服务态度很好不过似乎水平比不上态度，checkin&out永远是大家抱怨的焦点。据说总部规定...  hotel      1  ch\n",
       "7179   \"There are no spoilers in this review. There's...  movie      0  en\n",
       "20916  \"Celia Johnson is good as the Nurse. Michael H...  movie      0  en\n",
       "16215  \"This U.S soap opera, 'Knots Landing' has all ...  movie      0  en\n",
       "10800  \"This is a movie which attempts a retelling of...  movie      0  en\n",
       "7222   \"As with a bunch of guys at school we must giv...  movie      1  en\n",
       "1619   广大携程会员请注意:此酒店旁边在造地铁之类的施工,有噪音和灰尘.等完工还有一段日期.出门叫出...  hotel      0  ch\n",
       "2846   \"You do not get more dark or tragic than \\\"Oth...  movie      1  en\n",
       "4213   \"Well, at least my theater group did, lol. So ...  movie      0  en\n",
       "6685   \"Once again Jet Li brings his charismatic pres...  movie      1  en\n",
       "18165  \"This show has come so far. At first EVERYONE ...  movie      1  en\n",
       "21109  \"Citizen X tells the story of Andrei Chikatilo...  movie      1  en\n",
       "13694  \"Steve Biko was a black activist who tried to ...  movie      1  en\n",
       "7683   \"Scott Menville is not Casey Kasem. That is th...  movie      0  en\n",
       "6434   \"Quite possibly the nicest woman in show busin...  movie      0  en\n",
       "\n",
       "[24736 rows x 4 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>context</th>\n",
       "      <th>field</th>\n",
       "      <th>label</th>\n",
       "      <th>lan</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>23511</th>\n",
       "      <td>\"\\\"Igor and the Lunatics\\\" is a totally inept ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13999</th>\n",
       "      <td>\"This is a perfect series for family viewing. ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>894</th>\n",
       "      <td>i'm giving this stinker . \\nnormally , the wor...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1609</th>\n",
       "      <td>\"\\\"Kaabee\\\" depicts the hardship of a woman in...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21690</th>\n",
       "      <td>\"First off I'd like to say that if I had to ho...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14745</th>\n",
       "      <td>\"After a long period in the space, looking for...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22231</th>\n",
       "      <td>\"Not often have i had the feeling of a movie i...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11142</th>\n",
       "      <td>\"First things first, the female lead is too go...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>642</th>\n",
       "      <td>酒店的性价比很高.我特别要感谢早餐厅的服务员,8月2日早晨我吃完早餐将小包留在椅子上,很快服...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7849</th>\n",
       "      <td>\"Steven Spielberg produced, wrote, came up wit...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4567</th>\n",
       "      <td>\"This version of \\\"Moby Dick\\\" insults the aud...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>666</th>\n",
       "      <td>\"Never before have the motives of the producer...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5200</th>\n",
       "      <td>\"Now days, most people don't watch classic mov...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8854</th>\n",
       "      <td>\"Although it's not as creepy as it's cult clas...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>406</th>\n",
       "      <td>春节去住了一晚,进门就像到了一个大市场,晚上快12:00了,竟然还人声鼎沸,感觉很闹.房间进...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9267</th>\n",
       "      <td>\"I love this movie and have seen it quite a fe...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17378</th>\n",
       "      <td>\"I remember seeing this movie when I was about...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9432</th>\n",
       "      <td>\"Well, on the endless quest for horror, we wil...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19911</th>\n",
       "      <td>\"Back to the roots with \\\"like it is in heaven...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18425</th>\n",
       "      <td>\"This, the direct-to-video death rattle of the...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22536</th>\n",
       "      <td>\"A text prologue warns us that we should not a...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19150</th>\n",
       "      <td>\"The simple hand camera both gives some almost...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24020</th>\n",
       "      <td>\"I am not one of those people who just go onli...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1784</th>\n",
       "      <td>朋友推荐的酒店，位置不错，交通很方便，酒店的服务也很贴心，入住时前台适时地送上一杯冰柠檬茶，...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17215</th>\n",
       "      <td>\"Suggesting nothing less than a movie-length v...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24514</th>\n",
       "      <td>\"I am sorry to say that it was one of the wors...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10373</th>\n",
       "      <td>\"Not very interesting teen whodunit saved from...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1049</th>\n",
       "      <td>\"I was reviewing some old VHS tapes I have and...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9673</th>\n",
       "      <td>\"I must say this movie is a Mork and Mindy kno...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>843</th>\n",
       "      <td>the small-scale film , in limited release , \" ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1346</th>\n",
       "      <td>本来预定普通标间，到前台才知道没有房间了，故此升级到携程上没有的豪华大床房，房间不大但很干净...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>978</th>\n",
       "      <td>\" when you get out of jail , you can kill him...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1730</th>\n",
       "      <td>我喜歡這家酒店,很多年前住過,後來又住過其他的酒店,感覺還是這家好.覺得它性價比都不錯.尤其...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22607</th>\n",
       "      <td>\"I gotta be straight-up - I haven't seen a fil...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17870</th>\n",
       "      <td>\"A riotous farce set in the world of glamorous...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1847</th>\n",
       "      <td>6月17日入住两天，订的是半海景房。\\r\\r\\n去的那天下着雨，因为不熟悉，坐地铁到上环站后...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20752</th>\n",
       "      <td>\"***SPOILERS*** ***SPOILERS*** Well, seeing as...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22350</th>\n",
       "      <td>\"This film is one of those that has a resoundi...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22015</th>\n",
       "      <td>\"In my knowledge, Largo winch was a famous Bel...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3310</th>\n",
       "      <td>\"If you enjoy the original SNL cast and shows ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>86</th>\n",
       "      <td>酒店有异味，无空调，给人的整体感觉很破旧，根本不能称为四星\\r\\r\\n\\r\\r\\n\\r\\r\\...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7900</th>\n",
       "      <td>\"turned out to be another failed attempt by th...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18778</th>\n",
       "      <td>\"Is there any other time period that has been ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20130</th>\n",
       "      <td>\"\\\"Valentine\\\" is another horror movie to add ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1039</th>\n",
       "      <td>扬子岛酒店的位置很好，逛街、购物、吃饭、娱乐都很方便，酒店对面就是大都会和太平洋商场，背后就...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1848</th>\n",
       "      <td>看了大家的评价不错，才选择这家的。房间无论如何不能说是三星的标准，个人感觉不如如家布置得温馨...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20835</th>\n",
       "      <td>\"A hot-headed cop accidentally kills a murder ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16325</th>\n",
       "      <td>\"The annoying mouse and lullaby really got to ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8563</th>\n",
       "      <td>\"Get this film if at all possible. You will fi...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>\"Intrigued by the synopsis (every gay video th...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6852</th>\n",
       "      <td>\"Overlong drama that isn't capable of making a...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17099</th>\n",
       "      <td>\"Nina Foch insists that \\\"My Name is Julia Ros...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>zero effect gets its title from the main chara...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14902</th>\n",
       "      <td>\"This is without a doubt one of the best movie...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11327</th>\n",
       "      <td>\"The Master Blackmailer, based off of Sir Arth...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18777</th>\n",
       "      <td>\"At the time I am writing this I see out of ov...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10317</th>\n",
       "      <td>\"When THE MAGIC OF LASSIE opened at Radio City...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15595</th>\n",
       "      <td>\"Absence of a GOOD PLOT, absence of decent ACT...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2266</th>\n",
       "      <td>\"I loved this episode. It is so great that all...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19547</th>\n",
       "      <td>\"THE FOURTH MAN (Paul Verhoeven - Netherlands ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>6184 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 context  field  label lan\n",
       "23511  \"\\\"Igor and the Lunatics\\\" is a totally inept ...  movie      0  en\n",
       "13999  \"This is a perfect series for family viewing. ...  movie      1  en\n",
       "894    i'm giving this stinker . \\nnormally , the wor...  movie      0  en\n",
       "1609   \"\\\"Kaabee\\\" depicts the hardship of a woman in...  movie      1  en\n",
       "21690  \"First off I'd like to say that if I had to ho...  movie      0  en\n",
       "14745  \"After a long period in the space, looking for...  movie      0  en\n",
       "22231  \"Not often have i had the feeling of a movie i...  movie      1  en\n",
       "11142  \"First things first, the female lead is too go...  movie      1  en\n",
       "642    酒店的性价比很高.我特别要感谢早餐厅的服务员,8月2日早晨我吃完早餐将小包留在椅子上,很快服...  hotel      1  ch\n",
       "7849   \"Steven Spielberg produced, wrote, came up wit...  movie      1  en\n",
       "4567   \"This version of \\\"Moby Dick\\\" insults the aud...  movie      0  en\n",
       "666    \"Never before have the motives of the producer...  movie      0  en\n",
       "5200   \"Now days, most people don't watch classic mov...  movie      1  en\n",
       "8854   \"Although it's not as creepy as it's cult clas...  movie      1  en\n",
       "406    春节去住了一晚,进门就像到了一个大市场,晚上快12:00了,竟然还人声鼎沸,感觉很闹.房间进...  hotel      0  ch\n",
       "9267   \"I love this movie and have seen it quite a fe...  movie      1  en\n",
       "17378  \"I remember seeing this movie when I was about...  movie      1  en\n",
       "9432   \"Well, on the endless quest for horror, we wil...  movie      0  en\n",
       "19911  \"Back to the roots with \\\"like it is in heaven...  movie      1  en\n",
       "18425  \"This, the direct-to-video death rattle of the...  movie      0  en\n",
       "22536  \"A text prologue warns us that we should not a...  movie      0  en\n",
       "19150  \"The simple hand camera both gives some almost...  movie      1  en\n",
       "24020  \"I am not one of those people who just go onli...  movie      0  en\n",
       "1784   朋友推荐的酒店，位置不错，交通很方便，酒店的服务也很贴心，入住时前台适时地送上一杯冰柠檬茶，...  hotel      1  ch\n",
       "17215  \"Suggesting nothing less than a movie-length v...  movie      0  en\n",
       "24514  \"I am sorry to say that it was one of the wors...  movie      0  en\n",
       "10373  \"Not very interesting teen whodunit saved from...  movie      0  en\n",
       "1049   \"I was reviewing some old VHS tapes I have and...  movie      1  en\n",
       "9673   \"I must say this movie is a Mork and Mindy kno...  movie      0  en\n",
       "843    the small-scale film , in limited release , \" ...  movie      1  en\n",
       "...                                                  ...    ...    ...  ..\n",
       "1346   本来预定普通标间，到前台才知道没有房间了，故此升级到携程上没有的豪华大床房，房间不大但很干净...  hotel      1  ch\n",
       "978     \" when you get out of jail , you can kill him...  movie      1  en\n",
       "1730   我喜歡這家酒店,很多年前住過,後來又住過其他的酒店,感覺還是這家好.覺得它性價比都不錯.尤其...  hotel      1  ch\n",
       "22607  \"I gotta be straight-up - I haven't seen a fil...  movie      1  en\n",
       "17870  \"A riotous farce set in the world of glamorous...  movie      1  en\n",
       "1847   6月17日入住两天，订的是半海景房。\\r\\r\\n去的那天下着雨，因为不熟悉，坐地铁到上环站后...  hotel      1  ch\n",
       "20752  \"***SPOILERS*** ***SPOILERS*** Well, seeing as...  movie      1  en\n",
       "22350  \"This film is one of those that has a resoundi...  movie      1  en\n",
       "22015  \"In my knowledge, Largo winch was a famous Bel...  movie      0  en\n",
       "3310   \"If you enjoy the original SNL cast and shows ...  movie      0  en\n",
       "86     酒店有异味，无空调，给人的整体感觉很破旧，根本不能称为四星\\r\\r\\n\\r\\r\\n\\r\\r\\...  hotel      0  ch\n",
       "7900   \"turned out to be another failed attempt by th...  movie      0  en\n",
       "18778  \"Is there any other time period that has been ...  movie      0  en\n",
       "20130  \"\\\"Valentine\\\" is another horror movie to add ...  movie      0  en\n",
       "1039   扬子岛酒店的位置很好，逛街、购物、吃饭、娱乐都很方便，酒店对面就是大都会和太平洋商场，背后就...  hotel      1  ch\n",
       "1848   看了大家的评价不错，才选择这家的。房间无论如何不能说是三星的标准，个人感觉不如如家布置得温馨...  hotel      0  ch\n",
       "20835  \"A hot-headed cop accidentally kills a murder ...  movie      1  en\n",
       "16325  \"The annoying mouse and lullaby really got to ...  movie      0  en\n",
       "8563   \"Get this film if at all possible. You will fi...  movie      1  en\n",
       "28     \"Intrigued by the synopsis (every gay video th...  movie      0  en\n",
       "6852   \"Overlong drama that isn't capable of making a...  movie      0  en\n",
       "17099  \"Nina Foch insists that \\\"My Name is Julia Ros...  movie      1  en\n",
       "71     zero effect gets its title from the main chara...  movie      1  en\n",
       "14902  \"This is without a doubt one of the best movie...  movie      1  en\n",
       "11327  \"The Master Blackmailer, based off of Sir Arth...  movie      1  en\n",
       "18777  \"At the time I am writing this I see out of ov...  movie      1  en\n",
       "10317  \"When THE MAGIC OF LASSIE opened at Radio City...  movie      0  en\n",
       "15595  \"Absence of a GOOD PLOT, absence of decent ACT...  movie      0  en\n",
       "2266   \"I loved this episode. It is so great that all...  movie      1  en\n",
       "19547  \"THE FOURTH MAN (Paul Verhoeven - Netherlands ...  movie      1  en\n",
       "\n",
       "[6184 rows x 4 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>context</th>\n",
       "      <th>field</th>\n",
       "      <th>label</th>\n",
       "      <th>lan</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>22620</th>\n",
       "      <td>\"I saw this recent Woody Allen film because I'...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10868</th>\n",
       "      <td>\"Thanks to some infamous home video distributo...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20524</th>\n",
       "      <td>\"I had two reasons for watching this swashbuck...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1494</th>\n",
       "      <td>第一次入住这么糟糕的三星级酒店,比招待所还差。\\r\\r\\n一、相当的脏，地毯又旧又黑，床单未...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10430</th>\n",
       "      <td>\"My Favorite part was when the credits started...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11755</th>\n",
       "      <td>\"Holes is an awesome movie. I love it a lot an...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>817</th>\n",
       "      <td>showgirls is the second major outing for the p...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4806</th>\n",
       "      <td>\"This is a real eye candy. A world made of flo...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19223</th>\n",
       "      <td>\"Really bad. Why anyone thinks this is a good ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>\"Simon Pegg plays a rude crude and often out o...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>806</th>\n",
       "      <td>\"First off, if you're planning on watching thi...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1934</th>\n",
       "      <td>总体来说不怎么样的，进入当天需要门童帮助拿行李也是很不情愿的样子，服务员比你还要早就洗好澡了...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11341</th>\n",
       "      <td>\"This is actually a groovy-neat little flick, ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22648</th>\n",
       "      <td>\"I'm a sucker for a good romance, but this one...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17812</th>\n",
       "      <td>\"Some might say something like \\\"Baby Geniuses...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24001</th>\n",
       "      <td>\"The dazzling seventeen-minute dance sequence ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10976</th>\n",
       "      <td>\"I love Ashley Judd and think all of her movie...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>虽然表面看很一般,但里面装修还是很精致的,住着干净,舒服;以后去宜兴首选还是宜兴宾馆!\\r\\...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>235</th>\n",
       "      <td>\"When Rodney Dangerfield is on a roll, he's hi...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6297</th>\n",
       "      <td>\"It was probably just my DVD---but I would not...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1090</th>\n",
       "      <td>位置离我们单位很近,从价格来说,性价比很高.我要的大床房,168元,前台服务员态度很好,房间...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10813</th>\n",
       "      <td>\"This is not a movie for fans of the usual eer...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18087</th>\n",
       "      <td>\"A noble effort, I guess, but ultimately a poo...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21916</th>\n",
       "      <td>\"Okay, this film probably deserves 7 out of 10...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4036</th>\n",
       "      <td>\"In THE BARBARIAN AND THE GEISHA, John Wayne p...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6554</th>\n",
       "      <td>\"Saving Grace is a nice movie to watch in a bo...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11475</th>\n",
       "      <td>\"This movie frequently extrapolates quantum me...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3987</th>\n",
       "      <td>\"I have seen the movie Holes and say that it h...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20230</th>\n",
       "      <td>\"Penny Princess finds American working girl Yo...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>617</th>\n",
       "      <td>\"I really liked this film. All three stars(Con...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1127</th>\n",
       "      <td>房间干净整洁，位置也好，就是缺少部电梯，有点遗憾\\r\\r\\n\\r\\n\\r\\n</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15045</th>\n",
       "      <td>\"Yeah, I'm sure it really could be a nation . ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2643</th>\n",
       "      <td>\"Okay. This has been a favourite since I was 1...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>277</th>\n",
       "      <td>`oh behave ! \\nfelicity shagwell is one shagad...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7634</th>\n",
       "      <td>\"For me an unsatisfactory, unconvincing heist ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10135</th>\n",
       "      <td>\"Why is this movie not in the 250 best? This m...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>\"Antonio Margheriti's \\\"Danza Macabra\\\"/\\\"Cast...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>644</th>\n",
       "      <td>在预定时间到前台进行登记时，服务人员告诉我们：XC没有进行预定，后来通过多次电话.........</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10123</th>\n",
       "      <td>\"I'm not sure if this is some kind of masterpi...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16346</th>\n",
       "      <td>\"Vincente Minelli movies are usually worth you...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>总台服务还可以，房间实在不怎么样，空调很吵，床居然是一边高一边低的，这个房价实在是不值得，下...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1965</th>\n",
       "      <td>绝对是超三星标准，地处商业区，购物还是很方便的，对门有家羊杂店，绝对正宗。除了价格稍贵，总体...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1695</th>\n",
       "      <td>\"The Contaminated Man is a good film that has ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19554</th>\n",
       "      <td>\"This movie is sort of a Carrie meets Heavy Me...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>230</th>\n",
       "      <td>according to popular film opinion , a film's g...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3865</th>\n",
       "      <td>\"Walter Matthau and George Burns were a famous...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>980</th>\n",
       "      <td>in this good natured , pleasent and easy going...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16763</th>\n",
       "      <td>\"Pre-adolescent humor is present in large quan...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16061</th>\n",
       "      <td>\"This is a wonderful film. The non-stop patter...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24956</th>\n",
       "      <td>\"Of those comments here before mine, I mostly ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23743</th>\n",
       "      <td>\"I saw that when I was little and it was excel...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>358</th>\n",
       "      <td>这个宾馆太差了，我一辈子都不会再住了，电梯坏，设施陈旧，电视差，洗澡的液体质量差，和星级完全...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>0</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13575</th>\n",
       "      <td>\"Having just watched this movie, I almost feel...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1938</th>\n",
       "      <td>房间不错，虽然有点小，但很实用。宽带很快。不过前面的朋友说过的隔音效果差，确实有点。细察发现...</td>\n",
       "      <td>hotel</td>\n",
       "      <td>1</td>\n",
       "      <td>ch</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18560</th>\n",
       "      <td>\"I first watched the Walking Tall movies when ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6113</th>\n",
       "      <td>\"If I only had one camera that was accidentall...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24314</th>\n",
       "      <td>\"Olivier Gruner stars as Jacques a foreign exc...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16322</th>\n",
       "      <td>\"Yet another British romantic comedy which aud...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23497</th>\n",
       "      <td>\"\\\"Sir\\\" John Gielgud must have become senile ...</td>\n",
       "      <td>movie</td>\n",
       "      <td>0</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18457</th>\n",
       "      <td>\"The late, great Robert Bloch (author of PSYCH...</td>\n",
       "      <td>movie</td>\n",
       "      <td>1</td>\n",
       "      <td>en</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3092 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 context  field  label lan\n",
       "22620  \"I saw this recent Woody Allen film because I'...  movie      0  en\n",
       "10868  \"Thanks to some infamous home video distributo...  movie      0  en\n",
       "20524  \"I had two reasons for watching this swashbuck...  movie      0  en\n",
       "1494   第一次入住这么糟糕的三星级酒店,比招待所还差。\\r\\r\\n一、相当的脏，地毯又旧又黑，床单未...  hotel      0  ch\n",
       "10430  \"My Favorite part was when the credits started...  movie      0  en\n",
       "11755  \"Holes is an awesome movie. I love it a lot an...  movie      1  en\n",
       "817    showgirls is the second major outing for the p...  movie      1  en\n",
       "4806   \"This is a real eye candy. A world made of flo...  movie      1  en\n",
       "19223  \"Really bad. Why anyone thinks this is a good ...  movie      0  en\n",
       "31     \"Simon Pegg plays a rude crude and often out o...  movie      1  en\n",
       "806    \"First off, if you're planning on watching thi...  movie      1  en\n",
       "1934   总体来说不怎么样的，进入当天需要门童帮助拿行李也是很不情愿的样子，服务员比你还要早就洗好澡了...  hotel      0  ch\n",
       "11341  \"This is actually a groovy-neat little flick, ...  movie      1  en\n",
       "22648  \"I'm a sucker for a good romance, but this one...  movie      0  en\n",
       "17812  \"Some might say something like \\\"Baby Geniuses...  movie      0  en\n",
       "24001  \"The dazzling seventeen-minute dance sequence ...  movie      1  en\n",
       "10976  \"I love Ashley Judd and think all of her movie...  movie      1  en\n",
       "51     虽然表面看很一般,但里面装修还是很精致的,住着干净,舒服;以后去宜兴首选还是宜兴宾馆!\\r\\...  hotel      1  ch\n",
       "235    \"When Rodney Dangerfield is on a roll, he's hi...  movie      0  en\n",
       "6297   \"It was probably just my DVD---but I would not...  movie      0  en\n",
       "1090   位置离我们单位很近,从价格来说,性价比很高.我要的大床房,168元,前台服务员态度很好,房间...  hotel      1  ch\n",
       "10813  \"This is not a movie for fans of the usual eer...  movie      1  en\n",
       "18087  \"A noble effort, I guess, but ultimately a poo...  movie      0  en\n",
       "21916  \"Okay, this film probably deserves 7 out of 10...  movie      1  en\n",
       "4036   \"In THE BARBARIAN AND THE GEISHA, John Wayne p...  movie      1  en\n",
       "6554   \"Saving Grace is a nice movie to watch in a bo...  movie      1  en\n",
       "11475  \"This movie frequently extrapolates quantum me...  movie      0  en\n",
       "3987   \"I have seen the movie Holes and say that it h...  movie      1  en\n",
       "20230  \"Penny Princess finds American working girl Yo...  movie      1  en\n",
       "617    \"I really liked this film. All three stars(Con...  movie      1  en\n",
       "...                                                  ...    ...    ...  ..\n",
       "1127              房间干净整洁，位置也好，就是缺少部电梯，有点遗憾\\r\\r\\n\\r\\n\\r\\n  hotel      1  ch\n",
       "15045  \"Yeah, I'm sure it really could be a nation . ...  movie      0  en\n",
       "2643   \"Okay. This has been a favourite since I was 1...  movie      1  en\n",
       "277    `oh behave ! \\nfelicity shagwell is one shagad...  movie      1  en\n",
       "7634   \"For me an unsatisfactory, unconvincing heist ...  movie      0  en\n",
       "10135  \"Why is this movie not in the 250 best? This m...  movie      1  en\n",
       "42     \"Antonio Margheriti's \\\"Danza Macabra\\\"/\\\"Cast...  movie      1  en\n",
       "644    在预定时间到前台进行登记时，服务人员告诉我们：XC没有进行预定，后来通过多次电话.........  hotel      0  ch\n",
       "10123  \"I'm not sure if this is some kind of masterpi...  movie      1  en\n",
       "16346  \"Vincente Minelli movies are usually worth you...  movie      0  en\n",
       "56     总台服务还可以，房间实在不怎么样，空调很吵，床居然是一边高一边低的，这个房价实在是不值得，下...  hotel      0  ch\n",
       "1965   绝对是超三星标准，地处商业区，购物还是很方便的，对门有家羊杂店，绝对正宗。除了价格稍贵，总体...  hotel      1  ch\n",
       "1695   \"The Contaminated Man is a good film that has ...  movie      1  en\n",
       "19554  \"This movie is sort of a Carrie meets Heavy Me...  movie      1  en\n",
       "230    according to popular film opinion , a film's g...  movie      0  en\n",
       "3865   \"Walter Matthau and George Burns were a famous...  movie      1  en\n",
       "980    in this good natured , pleasent and easy going...  movie      1  en\n",
       "16763  \"Pre-adolescent humor is present in large quan...  movie      1  en\n",
       "16061  \"This is a wonderful film. The non-stop patter...  movie      1  en\n",
       "24956  \"Of those comments here before mine, I mostly ...  movie      1  en\n",
       "23743  \"I saw that when I was little and it was excel...  movie      1  en\n",
       "358    这个宾馆太差了，我一辈子都不会再住了，电梯坏，设施陈旧，电视差，洗澡的液体质量差，和星级完全...  hotel      0  ch\n",
       "13575  \"Having just watched this movie, I almost feel...  movie      0  en\n",
       "1938   房间不错，虽然有点小，但很实用。宽带很快。不过前面的朋友说过的隔音效果差，确实有点。细察发现...  hotel      1  ch\n",
       "18560  \"I first watched the Walking Tall movies when ...  movie      1  en\n",
       "6113   \"If I only had one camera that was accidentall...  movie      0  en\n",
       "24314  \"Olivier Gruner stars as Jacques a foreign exc...  movie      0  en\n",
       "16322  \"Yet another British romantic comedy which aud...  movie      0  en\n",
       "23497  \"\\\"Sir\\\" John Gielgud must have become senile ...  movie      0  en\n",
       "18457  \"The late, great Robert Bloch (author of PSYCH...  movie      1  en\n",
       "\n",
       "[3092 rows x 4 columns]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "valid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "pd.to_pickle(train,path+'train.pkl')\n",
    "pd.to_pickle(valid,path+'valid.pkl')\n",
    "pd.to_pickle(dev,path+'dev.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
